dNLS Preprocessing QC statistics ¶

July 2025 - with the new pipeline that: use new Brenner cutoffs, rescale tile, and removes empty tiles and dead cells¶

In [1]:
import os
import sys

#NOVA_HOME = '/home/projects/hornsteinlab/Collaboration/MOmaps_Noam/MOmaps'
os.environ['NOVA_HOME'] = '/home/projects/hornsteinlab/Collaboration/NOVA'
sys.path.insert(1, os.getenv('NOVA_HOME'))
print(f"NOVA_HOME: {os.getenv('NOVA_HOME')}")

NOVA_DATA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
LOGS_PATH = os.path.join("/home/projects/hornsteinlab/Collaboration/NOVA/outputs/preprocessing/ManuscriptFinalData_80pct/dNLS_new_CLEAN/logs")
PLOT_PATH = None

import pandas as pd
import numpy as np
import contextlib
import io
from IPython.display import display, Javascript

from tools.preprocessing_tools.qc_reports.qc_utils import log_files_qc, run_validate_folder_structure, display_diff, sample_and_calc_variance, \
                                                show_site_survival_dapi_brenner, show_site_survival_dapi_cellpose, \
                                                show_site_survival_dapi_tiling, show_site_survival_target_brenner, \
                                                calc_total_sums, plot_filtering_heatmap, show_total_sum_tables, \
                                                plot_cell_count, plot_catplot, plot_hm_of_mean_cell_count_per_tile, \
                                                run_calc_hist_new, show_total_valid_tiles_per_marker_and_batch
                                                
from tools.preprocessing_tools.qc_reports.qc_config import dnls_opera_panels, dnls_opera_markers, dnls_opera_marker_info, \
                                                           dnls_opera_cell_lines, \
                                                dnls_opera_cell_lines_to_cond, dnls_opera_cell_lines_for_disp, dnls_opera_reps, \
                                                dnls_opera_line_colors, dnls_opera_lines_order, dnls_opera_custom_palette, \
                                                dnls_opera_expected_dapi_raw, markers, custom_palette,dnls_opera_cell_lines_to_reps

%load_ext autoreload
%autoreload 2
NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA
In [2]:
# choose batches
batches = [f'batch{i}' for i in range(1,7)]
batches
Out[2]:
['batch1', 'batch2', 'batch3', 'batch4', 'batch5', 'batch6']
In [3]:
df = log_files_qc(LOGS_PATH, only_wt_cond=False, batches=batches, filename_split='-',site_location=0)
# # we are not using the wt line eventually
# df = df[df.cell_line != 'WT']

df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch5
reading logs of batch6
reading logs of batch3
reading logs of batch2
reading logs of batch4
reading logs of batch1

Total of 6 files were read.
Before dup handeling  (281633, 21)
After duplication removal #1: (281633, 22)
After duplication removal #2: (281633, 22)

PAY ATTENTION!!!! df.site_num: r04c04f120, can be defined using filename_split & site_location
In [4]:
# Nancy: before removing corrapted tiles, we had: "Before dup handeling  (308234, 21)" and "After duplication removal #1: (300473, 22)"

Actual Files Validation¶

Raw Files Validation¶

  1. How many site tiff files do we have in each folder?
  2. Are all existing files valid? (tif, at least 2049kB, not corrupetd)
In [5]:
root_directory_raw = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'raw', 'OPERA_dNLS_6_batches_NOVA_sorted')

raws = run_validate_folder_structure(root_directory_raw, False, dnls_opera_panels, dnls_opera_markers.copy(),PLOT_PATH, dnls_opera_marker_info,
                                    dnls_opera_cell_lines_to_cond, dnls_opera_reps, dnls_opera_cell_lines_for_disp, 
                                    dnls_opera_expected_dapi_raw,
                                     batches=batches, fig_width=2,fig_height=12,cell_lines_to_reps=dnls_opera_cell_lines_to_reps,
                                     expected_count=250, check_antibody=False)
batch1
Folder structure is valid.
No bad files are found.
Total Sites:  82000
========
batch2
Folder structure is valid.
No bad files are found.
Total Sites:  82000
========
batch3
Folder structure is valid.
No bad files are found.
Total Sites:  82000
========
batch4
Folder structure is valid.
No bad files are found.
Total Sites:  82000
========
batch5
Folder structure is valid.
No bad files are found.
Total Sites:  81736
========
batch6
Folder structure is valid.
No bad files are found.
Total Sites:  81997
========
====================

Processed Files Validation¶

  1. How many site npy files do we have in each folder? -> How many sites survived the pre-processing?
  2. Are all existing files valid? (at least 100kB, npy not corrupted)
In [6]:
root_directory_proc = os.path.join(NOVA_DATA_HOME, 'input', 'images', 'processed', 'ManuscriptFinalData_80pct', 'dNLS')

procs = run_validate_folder_structure(root_directory_proc, True, dnls_opera_panels, dnls_opera_markers,PLOT_PATH,dnls_opera_marker_info,
                                    dnls_opera_cell_lines_to_cond, dnls_opera_reps, dnls_opera_cell_lines_for_disp, dnls_opera_expected_dapi_raw,
                                    fig_width=2,fig_height=12,cell_lines_to_reps=dnls_opera_cell_lines_to_reps,
                                     expected_count=250, check_antibody=False, batches=batches)
batch1
Folder structure is valid.
No bad files are found.
Total Sites:  33530
========
batch2
Folder structure is valid.
No bad files are found.
Total Sites:  34223
========
batch3
Folder structure is invalid. Missing 15 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/dNLS/Untreated/G3BP1
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/dNLS/Untreated/NEMO
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/dNLS/Untreated/FMRP
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/dNLS/Untreated/DCP1A
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/dNLS/Untreated/PURA
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/G3BP1
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/PSD95
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/NEMO
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/LSM14A
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/FMRP
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/CLTC
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/DCP1A
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/HNRNPA1
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/PURA
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch3/WT/Untreated/Phalloidin
No bad files are found.
Total Sites:  9332
========
batch4
Folder structure is valid.
No bad files are found.
Total Sites:  65804
========
batch5
Folder structure is valid.
No bad files are found.
Total Sites:  75194
========
batch6
Folder structure is invalid. Missing 4 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch6/WT/Untreated/GM130
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch6/WT/Untreated/LSM14A
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch6/WT/Untreated/Calreticulin
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/dNLS/batch6/WT/Untreated/HNRNPA1
No bad files are found.
Total Sites:  22430
========
====================

Difference between Raw and Processed¶

In [7]:
display_diff(batches, raws, procs, PLOT_PATH, fig_width=2,fig_height=12)
batch1
========
batch2
========
batch3
========
batch4
========
batch5
========
batch6
========

Variance in each batch (of processed files)¶

In [8]:
for batch in batches:
    with contextlib.redirect_stdout(io.StringIO()):
        var = sample_and_calc_variance(root_directory_proc, batch, 
                                       sample_size_per_markers=500, cond_count=2, rep_count=len(dnls_opera_reps), 
                                       num_markers=len(dnls_opera_markers))
    print(f'{batch} var: ',var)
batch1 var:  0.047809197360007195
batch2 var:  0.04759923642111806
batch3 var:  0.04780787975481354
batch4 var:  0.04614634300003594
batch5 var:  0.04514271780294911
batch6 var:  0.04655098226554121

Preprocessing Filtering qc¶

By order of filtering

1. % site survival after Brenner on DAPI channel¶

Percentage out of the total sites

In [9]:
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi, 
                                                         batches, dnls_opera_line_colors, dnls_opera_panels, 
                                                         dnls_opera_reps, figsize=(3,5),
                                                         vmax=250, 
                                                         to_ignore={'cell_line_cond':'WT Untreated','rep':'rep3'}
                                                        )

2. % Site survival after Cellpose¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if Cellpose found 0 cells in it.

In [10]:
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, batches, dapi_filter_by_brenner, dnls_opera_line_colors, 
                                                           dnls_opera_panels, dnls_opera_reps, figsize=(3,5),
                                                          to_ignore={'cell_line_cond':'WT Untreated','rep':'rep3'})
In [11]:
dapi_filter_by_cellpose.drop(columns='index') #== dapi_filter_by_brenner.drop(columns='index')).all().all()
Out[11]:
batch cell_line_cond panel rep
0 batch1 WT Untreated panelA rep1
1 batch1 WT Untreated panelA rep2
2 batch1 WT Untreated panelA rep3
3 batch1 WT Untreated panelB rep1
4 batch1 WT Untreated panelB rep2
... ... ... ... ...
643 batch6 dNLS Untreated panelK rep2
644 batch6 dNLS Untreated panelK rep3
645 batch6 dNLS Untreated panelL rep1
646 batch6 dNLS Untreated panelL rep2
647 batch6 dNLS Untreated panelL rep3

648 rows × 4 columns

3. % Site survival by tiling¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.

A site will be filtered out if after tiling, no tile is containing at least one whole cell that Cellpose detected.

In [12]:
dapi_filter_by_tiling=show_site_survival_dapi_tiling(df_dapi, batches, dapi_filter_by_cellpose, dnls_opera_line_colors, dnls_opera_panels, 
                                                     dnls_opera_reps, figsize=(3,5),
                                                    to_ignore={'cell_line_cond':'WT Untreated','rep':'rep3'})

4. % Site survival after Brenner on target channel¶

Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).

In [13]:
# AWAITING FOR NOAMMMMMM HELPPPP
show_site_survival_target_brenner(df_dapi, 
                                  df_target, 
                                  dapi_filter_by_tiling, 
                                  dnls_opera_markers,
#                                   batches, 
#                                   dnls_opera_line_colors, 
#                                   dnls_opera_panels,
#                                   dnls_opera_reps,
                                  figsize=(3,12),
                                  #to_ignore={'cell_line_cond':'WT Untreated','rep':'rep3'}
                                 )

Statistics About the Processed Files¶

In [14]:
names = ['Total number of tiles', 'Total number of whole cells']
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count','site_cell_count_sum']
total_sum = calc_total_sums(df_target, df_dapi, stats, dnls_opera_markers)

Total tiles¶

In [15]:
# markers_for_dnls = markers.copy() #TODO need to change according to - if we use all markers or just the d8 ones!!!!
# markers_for_dnls.remove('TIA1')
# markers_for_dnls += ['TDP43B']

total_sum[total_sum.marker.isin(dnls_opera_markers)].n_valid_tiles.sum()
Out[15]:
1554279

Total whole nuclei in tiles¶

In [16]:
total_sum[total_sum.marker =='DAPI'].site_whole_cells_counts_sum.sum()
Out[16]:
590376.0

Total nuclei in sites¶

In [17]:
total_sum[total_sum.marker =='DAPI'].site_cell_count.sum()
Out[17]:
2101477.0
In [18]:
show_total_sum_tables(total_sum)
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch1
count 321.00000 321.000000 321.000000 321.000000
mean 615.65109 6.156511 707.386293 2522.713396
std 555.82936 5.558294 630.358356 2301.253813
min 0.00000 0.000000 3.000000 10.000000
25% 179.00000 1.790000 141.000000 506.000000
50% 457.00000 4.570000 544.000000 1972.000000
75% 915.00000 9.150000 1065.000000 3715.000000
max 2535.00000 25.350000 2417.000000 8549.000000
sum 197624.00000 NaN 227071.000000 809791.000000
expected_count 450.00000 450.000000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch2
count 325.000000 325.000000 325.000000 325.000000
mean 592.815385 5.928154 749.781538 2706.849231
std 545.695249 5.456952 654.901942 2436.478934
min 0.000000 0.000000 2.000000 2.000000
25% 144.000000 1.440000 118.000000 415.000000
50% 453.000000 4.530000 619.000000 2233.000000
75% 897.000000 8.970000 1186.000000 4415.000000
max 2190.000000 21.900000 2561.000000 9748.000000
sum 192665.000000 NaN 243679.000000 879726.000000
expected_count 450.000000 450.000000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch3
count 254.000000 254.000000 254.000000 254.000000
mean 198.346457 1.983465 262.043307 884.850394
std 331.865225 3.318652 450.839614 1522.781484
min 0.000000 0.000000 0.000000 0.000000
25% 3.000000 0.030000 5.000000 12.000000
50% 30.500000 0.305000 27.000000 107.000000
75% 243.000000 2.430000 344.000000 1132.250000
max 1548.000000 15.480000 2318.000000 7811.000000
sum 50380.000000 NaN 66559.000000 224752.000000
expected_count 450.000000 450.000000 450.000000 450.000000
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch4
count 328.000000 328.000000 328.000000 3.280000e+02
mean 1332.027439 13.320274 1585.518293 5.771637e+03
std 463.730917 4.637309 604.655531 2.194285e+03
min 82.000000 0.820000 90.000000 3.050000e+02
25% 1038.000000 10.380000 1184.000000 4.270000e+03
50% 1416.000000 14.160000 1521.000000 5.504000e+03
75% 1601.000000 16.010000 2114.250000 7.605250e+03
max 2354.000000 23.540000 2818.000000 9.954000e+03
sum 436905.000000 NaN 520050.000000 1.893097e+06
expected_count 450.000000 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch5
count 328.000000 328.000000 328.000000 3.280000e+02
mean 1602.609756 16.026098 2150.393293 7.638360e+03
std 455.530413 4.555304 475.977617 1.774749e+03
min 392.000000 3.920000 1125.000000 4.181000e+03
25% 1398.500000 13.985000 1730.500000 5.868000e+03
50% 1652.500000 16.525000 2215.000000 7.932000e+03
75% 1918.500000 19.185000 2536.000000 9.038000e+03
max 2491.000000 24.910000 3248.000000 1.161400e+04
sum 525656.000000 NaN 705329.000000 2.505382e+06
expected_count 450.000000 450.000000 450.000000 4.500000e+02
n_valid_tiles % valid tiles site_whole_cells_counts_sum site_cell_count
batch6
count 301.00000 301.000000 301.000000 301.000000
mean 501.82392 5.018239 796.152824 2748.262458
std 651.54062 6.515406 962.054238 3370.994066
min 0.00000 0.000000 0.000000 1.000000
25% 20.00000 0.200000 17.000000 61.000000
50% 205.00000 2.050000 307.000000 1040.000000
75% 945.00000 9.450000 1386.000000 4681.000000
max 2492.00000 24.920000 3386.000000 12045.000000
sum 151049.00000 NaN 239642.000000 827227.000000
expected_count 450.00000 450.000000 450.000000 450.000000
n valid tiles % valid tiles site_whole_cells_counts_sum site_cell_count
All batches
count 1.857000e+03 1857.000000 1.857000e+03 1.857000e+03
mean 8.369838e+02 8.369838 1.078261e+03 3.844898e+03
std 7.093491e+02 7.093491 9.057576e+02 3.260056e+03
min 0.000000e+00 0.000000 0.000000e+00 0.000000e+00
25% 1.440000e+02 1.440000 1.480000e+02 5.280000e+02
50% 6.940000e+02 6.940000 9.880000e+02 3.403000e+03
75% 1.466000e+03 14.660000 1.793000e+03 6.453000e+03
max 2.535000e+03 25.350000 3.386000e+03 1.204500e+04
sum 1.554279e+06 NaN 2.002330e+06 7.139975e+06
expected_count 4.500000e+02 450.000000 4.500000e+02 4.500000e+02

Show Total Tile Counts¶

For each batch, cell line, replicate and marker: Total number of tiles

First, we look at all cell lines togther:¶

In [19]:
show_total_valid_tiles_per_marker_and_batch(total_sum, vmax=1000)

Separating into cell lines & batches:¶

In [20]:
to_heatmap = total_sum.rename(columns={'n_valid_tiles':'index'})
plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
                      xlabel = 'Total number of tiles', show_sum=True, figsize=(3,12), fmt=".0f")
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)

Show Total Whole Cell Counts¶

For each batch, cell line, replicate and markerTotal number of tiles

In [21]:
to_heatmap = total_sum.rename(columns={'site_whole_cells_counts_sum':'index'})
plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
                      xlabel = 'Total number of whole cells', show_sum=True, figsize=(3,10), fmt=".0f")
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:391: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_yticklabels(ax.get_yticklabels(), fontsize=6)

Show Cell Count Statistics per Batch¶

In [22]:
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, dnls_opera_lines_order, dnls_opera_custom_palette, y='site_cell_count_sum', 
                title='Cell Count Average per Site (from tiles)')

plot_cell_count(df_no_empty_sites, dnls_opera_lines_order, dnls_opera_custom_palette, y='site_whole_cells_counts_sum',
                title='Whole Cell Count Average per Site')

plot_cell_count(df_no_empty_sites, dnls_opera_lines_order, dnls_opera_custom_palette, y='site_cell_count',
               title='Cellpose Cell Count Average per Site')

Show Tiles per Site Statistics¶

In [23]:
df_dapi.groupby(['cell_line_cond']).n_valid_tiles.mean()
Out[23]:
cell_line_cond
WT Untreated      3.174393
dNLS DOX          5.061329
dNLS Untreated    3.989770
Name: n_valid_tiles, dtype: float64
In [24]:
df_dapi[['site_cell_count']].mean()
Out[24]:
site_cell_count    19.46569
dtype: float64
In [25]:
plot_catplot(df_dapi, custom_palette,dnls_opera_reps, x='n_valid_tiles', x_title='valid tiles count', batch_min=1, batch_max=6, height=6)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1058: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'batch_rep'] = df['batch'] + " " + df['rep']

Show Mean of cell count in valid tiles¶

In [26]:
plot_hm_of_mean_cell_count_per_tile(df_dapi, split_by='rep', rows='cell_line_cond', columns='panel', figsize=(14,3))

Assessing Staining Reproducibility and Outliers¶

In [27]:
# for batch in batches:
#     print(batch)
#     run_calc_hist_new(f'{batch}', dnls_opera_cell_lines_for_disp, dnls_opera_markers,
#                       root_directory_raw, root_directory_proc,
#                            hist_sample=10,sample_size_per_markers=200, ncols=8, nrows=4, dnls=True)
#     print("="*30)
In [ ]:
# save notebook as HTML ( the HTML will be saved in the same folder the original script is)
from IPython.display import display, Javascript
display(Javascript('IPython.notebook.save_checkpoint();'))
os.system(f'jupyter nbconvert --to html tools/preprocessing_tools/qc_reports/qc_report_dNLS_Opera.ipynb --output {NOVA_HOME}/manuscript/preprocessing_qc_reports/qc_report_dNLS_Opera.html')
# os.system(f'jupyter nbconvert --to html tools/preprocessing_tools/qc_reports/qc_report_dNLS_Opera.ipynb')